deepresearch-flow 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. deepresearch_flow/paper/db.py +34 -0
  2. deepresearch_flow/paper/web/app.py +106 -1
  3. deepresearch_flow/paper/web/constants.py +5 -4
  4. deepresearch_flow/paper/web/handlers/__init__.py +2 -1
  5. deepresearch_flow/paper/web/handlers/api.py +55 -0
  6. deepresearch_flow/paper/web/handlers/pages.py +105 -25
  7. deepresearch_flow/paper/web/markdown.py +60 -0
  8. deepresearch_flow/paper/web/pdfjs/web/viewer.html +57 -5
  9. deepresearch_flow/paper/web/pdfjs/web/viewer.js +5 -1
  10. deepresearch_flow/paper/web/static/js/detail.js +494 -125
  11. deepresearch_flow/paper/web/static/js/outline.js +48 -34
  12. deepresearch_flow/paper/web/static_assets.py +289 -0
  13. deepresearch_flow/paper/web/templates/detail.html +46 -69
  14. deepresearch_flow/paper/web/templates/index.html +3 -3
  15. deepresearch_flow/paper/web/templates.py +7 -4
  16. deepresearch_flow/recognize/cli.py +805 -26
  17. deepresearch_flow/recognize/katex_check.js +29 -0
  18. deepresearch_flow/recognize/math.py +719 -0
  19. deepresearch_flow/recognize/mermaid.py +690 -0
  20. {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.5.0.dist-info}/METADATA +117 -4
  21. {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.5.0.dist-info}/RECORD +25 -21
  22. {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.5.0.dist-info}/WHEEL +0 -0
  23. {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.5.0.dist-info}/entry_points.txt +0 -0
  24. {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.5.0.dist-info}/licenses/LICENSE +0 -0
  25. {deepresearch_flow-0.4.0.dist-info → deepresearch_flow-0.5.0.dist-info}/top_level.txt +0 -0
@@ -600,6 +600,32 @@ def register_db_commands(db_group: click.Group) -> None:
600
600
  )
601
601
  @click.option("--cache-dir", "cache_dir", default=None, help="Cache directory for merged inputs")
602
602
  @click.option("--no-cache", "no_cache", is_flag=True, help="Disable cache for db serve")
603
+ @click.option(
604
+ "--static-base-url",
605
+ "static_base_url",
606
+ default=None,
607
+ help="Static asset base URL (e.g. https://static.example.com)",
608
+ )
609
+ @click.option(
610
+ "--static-mode",
611
+ "static_mode",
612
+ type=click.Choice(["auto", "dev", "prod"]),
613
+ default="auto",
614
+ show_default=True,
615
+ help="Static asset mode (dev uses local assets, prod uses static base URL)",
616
+ )
617
+ @click.option(
618
+ "--static-export-dir",
619
+ "static_export_dir",
620
+ default=None,
621
+ help="Optional export directory for hashed static assets",
622
+ )
623
+ @click.option(
624
+ "--pdfjs-cdn-base-url",
625
+ "pdfjs_cdn_base_url",
626
+ default=None,
627
+ help="PDF.js CDN base URL (defaults to jsDelivr)",
628
+ )
603
629
  @click.option("--host", default="127.0.0.1", show_default=True, help="Bind host")
604
630
  @click.option("--port", default=8000, type=int, show_default=True, help="Bind port")
605
631
  @click.option(
@@ -617,6 +643,10 @@ def register_db_commands(db_group: click.Group) -> None:
617
643
  pdf_roots: tuple[str, ...],
618
644
  cache_dir: str | None,
619
645
  no_cache: bool,
646
+ static_base_url: str | None,
647
+ static_mode: str,
648
+ static_export_dir: str | None,
649
+ pdfjs_cdn_base_url: str | None,
620
650
  host: str,
621
651
  port: int,
622
652
  fallback_language: str,
@@ -635,6 +665,10 @@ def register_db_commands(db_group: click.Group) -> None:
635
665
  pdf_roots=[Path(root) for root in pdf_roots],
636
666
  cache_dir=Path(cache_dir) if cache_dir else None,
637
667
  use_cache=not no_cache,
668
+ static_base_url=static_base_url,
669
+ static_mode=static_mode,
670
+ static_export_dir=Path(static_export_dir) if static_export_dir else None,
671
+ pdfjs_cdn_base_url=pdfjs_cdn_base_url,
638
672
  )
639
673
  except Exception as exc:
640
674
  raise click.ClickException(str(exc)) from exc
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ import os
4
5
  from pathlib import Path
5
6
 
6
7
  from starlette.applications import Starlette
@@ -10,8 +11,9 @@ from starlette.routing import Mount, Route
10
11
  from starlette.staticfiles import StaticFiles
11
12
 
12
13
  from deepresearch_flow.paper.db_ops import build_index, load_and_merge_papers
13
- from deepresearch_flow.paper.web.constants import PDFJS_STATIC_DIR, STATIC_DIR
14
+ from deepresearch_flow.paper.web.constants import DEFAULT_PDFJS_CDN_BASE_URL, PDFJS_STATIC_DIR, STATIC_DIR
14
15
  from deepresearch_flow.paper.web.handlers import (
16
+ api_markdown,
15
17
  api_papers,
16
18
  api_pdf,
17
19
  api_stats,
@@ -21,6 +23,7 @@ from deepresearch_flow.paper.web.handlers import (
21
23
  stats_page,
22
24
  )
23
25
  from deepresearch_flow.paper.web.markdown import create_md_renderer
26
+ from deepresearch_flow.paper.web.static_assets import build_static_assets
24
27
 
25
28
  logger = logging.getLogger(__name__)
26
29
 
@@ -32,6 +35,35 @@ class _NoIndexMiddleware(BaseHTTPMiddleware):
32
35
  return response
33
36
 
34
37
 
38
+ class _StaticAssetFiles(StaticFiles):
39
+ def __init__(self, *args, cache_control: str | None = None, **kwargs) -> None:
40
+ super().__init__(*args, **kwargs)
41
+ self._cache_control = cache_control
42
+
43
+ async def get_response(self, path: str, scope): # type: ignore[override]
44
+ response = await super().get_response(path, scope)
45
+ if self._cache_control and response.status_code == 200:
46
+ response.headers.setdefault("Cache-Control", self._cache_control)
47
+ return response
48
+
49
+
50
+ def _normalize_static_mode(value: str | None) -> str:
51
+ if not value:
52
+ return "auto"
53
+ normalized = value.strip().lower()
54
+ if normalized in {"dev", "development"}:
55
+ return "dev"
56
+ if normalized in {"prod", "production"}:
57
+ return "prod"
58
+ return "auto"
59
+
60
+
61
+ def _resolve_static_mode(value: str, static_base_url: str | None) -> str:
62
+ if value == "auto":
63
+ return "prod" if static_base_url else "dev"
64
+ return value
65
+
66
+
35
67
  def create_app(
36
68
  *,
37
69
  db_paths: list[Path],
@@ -42,6 +74,10 @@ def create_app(
42
74
  pdf_roots: list[Path] | None = None,
43
75
  cache_dir: Path | None = None,
44
76
  use_cache: bool = True,
77
+ static_base_url: str | None = None,
78
+ static_mode: str | None = None,
79
+ static_export_dir: Path | None = None,
80
+ pdfjs_cdn_base_url: str | None = None,
45
81
  ) -> Starlette:
46
82
  papers = load_and_merge_papers(db_paths, bibtex_path, cache_dir, use_cache, pdf_roots=pdf_roots)
47
83
 
@@ -55,6 +91,44 @@ def create_app(
55
91
  pdf_roots=pdf_roots,
56
92
  )
57
93
  md = create_md_renderer()
94
+ static_base_url = static_base_url or os.getenv("PAPER_DB_STATIC_BASE_URL")
95
+ static_mode = _normalize_static_mode(static_mode or os.getenv("PAPER_DB_STATIC_MODE"))
96
+ resolved_mode = _resolve_static_mode(static_mode, static_base_url)
97
+ export_dir_value = static_export_dir or os.getenv("PAPER_DB_STATIC_EXPORT_DIR")
98
+ export_dir = Path(export_dir_value) if export_dir_value else None
99
+ pdfjs_cdn_base_url = (
100
+ pdfjs_cdn_base_url
101
+ or os.getenv("PAPER_DB_PDFJS_CDN_BASE_URL")
102
+ or DEFAULT_PDFJS_CDN_BASE_URL
103
+ )
104
+ if pdfjs_cdn_base_url:
105
+ lowered = pdfjs_cdn_base_url.strip().lower()
106
+ if lowered in {"none", "off", "local"}:
107
+ pdfjs_cdn_base_url = None
108
+ else:
109
+ pdfjs_cdn_base_url = pdfjs_cdn_base_url.rstrip("/")
110
+
111
+ asset_config = None
112
+ if resolved_mode == "prod":
113
+ if not static_base_url:
114
+ logger.warning("Static mode set to prod without base URL; falling back to dev asset routes.")
115
+ resolved_mode = "dev"
116
+ else:
117
+ asset_config = build_static_assets(
118
+ index,
119
+ static_base_url=static_base_url,
120
+ static_export_dir=export_dir,
121
+ )
122
+ if resolved_mode == "dev" and export_dir:
123
+ asset_config = build_static_assets(
124
+ index,
125
+ static_base_url="",
126
+ static_export_dir=export_dir,
127
+ allow_empty_base=True,
128
+ )
129
+ if asset_config is None:
130
+ asset_config = build_static_assets(index, static_base_url=None)
131
+
58
132
  routes = [
59
133
  Route("/", index_page, methods=["GET"]),
60
134
  Route("/robots.txt", robots_txt, methods=["GET"]),
@@ -63,6 +137,7 @@ def create_app(
63
137
  Route("/api/papers", api_papers, methods=["GET"]),
64
138
  Route("/api/stats", api_stats, methods=["GET"]),
65
139
  Route("/api/pdf/{source_hash:str}", api_pdf, methods=["GET"]),
140
+ Route("/api/dev/markdown/{source_hash:str}", api_markdown, methods=["GET"]),
66
141
  ]
67
142
  if PDFJS_STATIC_DIR.exists():
68
143
  routes.append(
@@ -85,10 +160,40 @@ def create_app(
85
160
  name="static",
86
161
  )
87
162
  )
163
+ if export_dir and export_dir.exists() and asset_config.enabled and not asset_config.base_url:
164
+ cache_header = "public, max-age=31536000, immutable"
165
+ routes.extend(
166
+ [
167
+ Mount(
168
+ "/pdf",
169
+ app=_StaticAssetFiles(directory=str(export_dir / "pdf"), cache_control=cache_header),
170
+ name="static_pdf",
171
+ ),
172
+ Mount(
173
+ "/images",
174
+ app=_StaticAssetFiles(directory=str(export_dir / "images"), cache_control=cache_header),
175
+ name="static_images",
176
+ ),
177
+ Mount(
178
+ "/md",
179
+ app=_StaticAssetFiles(directory=str(export_dir / "md"), cache_control=cache_header),
180
+ name="static_md",
181
+ ),
182
+ Mount(
183
+ "/md_translate",
184
+ app=_StaticAssetFiles(directory=str(export_dir / "md_translate"), cache_control=cache_header),
185
+ name="static_md_translate",
186
+ ),
187
+ ]
188
+ )
88
189
  app = Starlette(routes=routes)
89
190
  app.add_middleware(_NoIndexMiddleware)
90
191
  app.state.index = index
91
192
  app.state.md = md
92
193
  app.state.fallback_language = fallback_language
93
194
  app.state.pdf_roots = pdf_roots
195
+ app.state.static_mode = resolved_mode
196
+ app.state.asset_config = asset_config
197
+ app.state.static_export_dir = export_dir
198
+ app.state.pdfjs_cdn_base_url = pdfjs_cdn_base_url
94
199
  return app
@@ -4,14 +4,15 @@ from pathlib import Path
4
4
 
5
5
  # CDN URLs for external libraries
6
6
  CDN_ECHARTS = "https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"
7
- CDN_MERMAID = "https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"
8
- CDN_KATEX = "https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css"
9
- CDN_KATEX_JS = "https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.js"
10
- CDN_KATEX_AUTO = "https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/contrib/auto-render.min.js"
7
+ CDN_MERMAID = "https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.min.js"
8
+ CDN_KATEX = "https://cdn.jsdelivr.net/npm/katex@0.16.27/dist/katex.min.css"
9
+ CDN_KATEX_JS = "https://cdn.jsdelivr.net/npm/katex@0.16.27/dist/katex.min.js"
10
+ CDN_KATEX_AUTO = "https://cdn.jsdelivr.net/npm/katex@0.16.27/dist/contrib/auto-render.min.js"
11
11
 
12
12
  # Use legacy builds to ensure `pdfjsLib` is available as a global.
13
13
  CDN_PDFJS = "https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/legacy/build/pdf.min.js"
14
14
  CDN_PDFJS_WORKER = "https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174/legacy/build/pdf.worker.min.js"
15
+ DEFAULT_PDFJS_CDN_BASE_URL = "https://cdn.jsdelivr.net/npm/pdfjs-dist@3.11.174"
15
16
 
16
17
  # PDF.js viewer configuration
17
18
  PDFJS_VIEWER_PATH = "/pdfjs/web/viewer.html"
@@ -1,12 +1,13 @@
1
1
  """Route handlers for paper web UI."""
2
2
 
3
- from .api import api_papers, api_pdf, api_stats
3
+ from .api import api_markdown, api_papers, api_pdf, api_stats
4
4
  from .pages import index_page, paper_detail, robots_txt, stats_page
5
5
 
6
6
  __all__ = [
7
7
  "api_papers",
8
8
  "api_pdf",
9
9
  "api_stats",
10
+ "api_markdown",
10
11
  "index_page",
11
12
  "paper_detail",
12
13
  "robots_txt",
@@ -19,6 +19,8 @@ from deepresearch_flow.paper.web.filters import (
19
19
  presence_filter,
20
20
  sorted_ids,
21
21
  )
22
+ from deepresearch_flow.paper.web.markdown import normalize_markdown_images
23
+ from deepresearch_flow.paper.web.static_assets import resolve_asset_urls
22
24
  from deepresearch_flow.paper.web.text import extract_summary_snippet, normalize_title, normalize_venue
23
25
  from deepresearch_flow.paper.web.query import Query, QueryTerm, parse_query
24
26
 
@@ -92,9 +94,18 @@ def _apply_query(index: PaperIndex, query: Query) -> set[int]:
92
94
  return result
93
95
 
94
96
 
97
+ def _safe_read_text(path: Path) -> str:
98
+ try:
99
+ return path.read_text(encoding="utf-8")
100
+ except UnicodeDecodeError:
101
+ return path.read_text(encoding="latin-1")
102
+
103
+
95
104
  async def api_papers(request: Request) -> JSONResponse:
96
105
  """API endpoint for paper list with filtering, sorting, and pagination."""
97
106
  index: PaperIndex = request.app.state.index
107
+ asset_config = request.app.state.asset_config
108
+ prefer_local = request.app.state.static_mode == "dev"
98
109
  filters = parse_filters(request)
99
110
  page = int(filters["page"])
100
111
  page_size = int(filters["page_size"])
@@ -165,6 +176,7 @@ async def api_papers(request: Request) -> JSONResponse:
165
176
  source_hash = str(paper.get("source_hash") or stable_hash(str(paper.get("source_path") or idx)))
166
177
  translations = index.translated_md_by_hash.get(source_hash, {})
167
178
  translation_languages = sorted(translations.keys(), key=str.lower)
179
+ asset_urls = resolve_asset_urls(index, source_hash, asset_config, prefer_local=prefer_local)
168
180
  items.append(
169
181
  {
170
182
  "source_hash": source_hash,
@@ -183,6 +195,10 @@ async def api_papers(request: Request) -> JSONResponse:
183
195
  "has_summary": bool(paper.get("_has_summary")),
184
196
  "is_pdf_only": bool(paper.get("_is_pdf_only")),
185
197
  "translation_languages": translation_languages,
198
+ "pdf_url": asset_urls["pdf_url"],
199
+ "md_url": asset_urls["md_url"],
200
+ "md_translated_url": asset_urls["md_translated_url"],
201
+ "images_base_url": asset_urls["images_base_url"],
186
202
  }
187
203
  )
188
204
 
@@ -215,3 +231,42 @@ async def api_pdf(request: Request) -> Response:
215
231
  if allowed_roots and not _ensure_under_roots(pdf_path, allowed_roots):
216
232
  return Response("Forbidden", status_code=403)
217
233
  return FileResponse(pdf_path)
234
+
235
+
236
+ async def api_markdown(request: Request) -> Response:
237
+ """Dev-only API endpoint to serve raw markdown content."""
238
+ if request.app.state.static_mode != "dev":
239
+ return Response("Not Found", status_code=404)
240
+ index: PaperIndex = request.app.state.index
241
+ asset_config = request.app.state.asset_config
242
+ export_dir = request.app.state.static_export_dir
243
+ source_hash = request.path_params["source_hash"]
244
+ lang = request.query_params.get("lang")
245
+ md_path = None
246
+ if export_dir and asset_config and asset_config.enabled and (asset_config.base_url or "") == "":
247
+ if lang:
248
+ translated_url = asset_config.translated_md_urls.get(source_hash, {}).get(lang.lower())
249
+ if translated_url:
250
+ rel_path = translated_url.lstrip("/")
251
+ export_path = export_dir / rel_path
252
+ if export_path.exists():
253
+ raw = _safe_read_text(export_path)
254
+ return Response(raw, media_type="text/markdown")
255
+ else:
256
+ md_url = asset_config.md_urls.get(source_hash)
257
+ if md_url:
258
+ rel_path = md_url.lstrip("/")
259
+ export_path = export_dir / rel_path
260
+ if export_path.exists():
261
+ raw = _safe_read_text(export_path)
262
+ return Response(raw, media_type="text/markdown")
263
+ if lang:
264
+ md_path = index.translated_md_by_hash.get(source_hash, {}).get(lang.lower())
265
+ else:
266
+ md_path = index.md_path_by_hash.get(source_hash)
267
+ if not md_path:
268
+ return Response("Markdown not found", status_code=404)
269
+ raw = _safe_read_text(md_path)
270
+ if lang:
271
+ raw = normalize_markdown_images(raw)
272
+ return Response(raw, media_type="text/markdown")
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import html
6
+ from pathlib import Path
6
7
  from urllib.parse import urlencode
7
8
 
8
9
  from starlette.requests import Request
@@ -16,6 +17,7 @@ from deepresearch_flow.paper.web.markdown import (
16
17
  render_paper_markdown,
17
18
  select_template_tag,
18
19
  )
20
+ from deepresearch_flow.paper.web.static_assets import resolve_asset_urls
19
21
  from deepresearch_flow.paper.web.text import normalize_title
20
22
  from deepresearch_flow.paper.web.templates import (
21
23
  build_pdfjs_viewer_url,
@@ -23,6 +25,47 @@ from deepresearch_flow.paper.web.templates import (
23
25
  )
24
26
 
25
27
 
28
+ def _safe_read_text(path: Path) -> str:
29
+ try:
30
+ return path.read_text(encoding="utf-8")
31
+ except UnicodeDecodeError:
32
+ return path.read_text(encoding="latin-1")
33
+
34
+
35
+ def _load_markdown_for_view(
36
+ index: PaperIndex,
37
+ asset_config,
38
+ export_dir: Path | None,
39
+ source_hash: str,
40
+ *,
41
+ lang: str | None = None,
42
+ ) -> str | None:
43
+ if export_dir and asset_config and asset_config.enabled and (asset_config.base_url or "") == "":
44
+ if lang:
45
+ translated_url = asset_config.translated_md_urls.get(source_hash, {}).get(lang.lower())
46
+ if translated_url:
47
+ export_path = export_dir / translated_url.lstrip("/")
48
+ if export_path.exists():
49
+ return _safe_read_text(export_path)
50
+ else:
51
+ md_url = asset_config.md_urls.get(source_hash)
52
+ if md_url:
53
+ export_path = export_dir / md_url.lstrip("/")
54
+ if export_path.exists():
55
+ return _safe_read_text(export_path)
56
+
57
+ if lang:
58
+ md_path = index.translated_md_by_hash.get(source_hash, {}).get(lang.lower())
59
+ else:
60
+ md_path = index.md_path_by_hash.get(source_hash)
61
+ if not md_path:
62
+ return None
63
+ raw = _safe_read_text(md_path)
64
+ if lang:
65
+ raw = normalize_markdown_images(raw)
66
+ return raw
67
+
68
+
26
69
  async def robots_txt(_: Request) -> Response:
27
70
  """Serve robots.txt to disallow all crawlers."""
28
71
  return Response("User-agent: *\nDisallow: /\n", media_type="text/plain")
@@ -78,7 +121,13 @@ async def paper_detail(request: Request) -> HTMLResponse:
78
121
  embed = request.query_params.get("embed") == "1"
79
122
 
80
123
  pdf_path = index.pdf_path_by_hash.get(source_hash)
81
- pdf_url = f"/api/pdf/{source_hash}"
124
+ asset_urls = resolve_asset_urls(
125
+ index,
126
+ source_hash,
127
+ request.app.state.asset_config,
128
+ prefer_local=request.app.state.static_mode == "dev",
129
+ )
130
+ pdf_url = asset_urls["pdf_url"] or ""
82
131
  source_available = source_hash in index.md_path_by_hash
83
132
  translations = index.translated_md_by_hash.get(source_hash, {})
84
133
  translation_langs = sorted(translations.keys(), key=str.lower)
@@ -156,14 +205,18 @@ async def paper_detail(request: Request) -> HTMLResponse:
156
205
 
157
206
  # Initialize template variables
158
207
  body_html = ""
159
- raw_content = ""
160
208
  summary_template_name = ""
161
209
  template_warning = ""
162
210
  template_controls = ""
163
211
  source_path_str = ""
164
212
  translated_path_str = ""
213
+ source_markdown_url = ""
214
+ translated_markdown_url = ""
215
+ images_base_url = asset_urls["images_base_url"] or ""
165
216
  pdf_filename = ""
166
217
  pdfjs_url = ""
218
+ pdfjs_script_url = ""
219
+ pdfjs_worker_url = ""
167
220
  left_src = ""
168
221
  right_src = ""
169
222
  split_options: list[tuple[str, str]] = []
@@ -208,21 +261,27 @@ if (templateSelect) {{
208
261
  </script>
209
262
  """
210
263
 
264
+ prefer_local = request.app.state.static_mode == "dev"
265
+
211
266
  # Source view
212
267
  if view == "source":
213
268
  source_path = index.md_path_by_hash.get(source_hash)
214
- if not source_path:
269
+ if not source_path or not asset_urls["md_url"]:
215
270
  body_html = '<div class="warning">Source markdown not found. Provide --md-root to enable source viewing.</div>'
216
271
  else:
217
- try:
218
- raw = source_path.read_text(encoding="utf-8")
219
- except UnicodeDecodeError:
220
- raw = source_path.read_text(encoding="latin-1")
221
- md_renderer = create_md_renderer()
222
- body_html = render_markdown_with_math_placeholders(md_renderer, raw)
223
- raw_content = raw
272
+ source_markdown_url = asset_urls["md_url"] or ""
224
273
  source_path_str = str(source_path)
225
274
  show_outline = True
275
+ if prefer_local:
276
+ raw = _load_markdown_for_view(
277
+ index,
278
+ request.app.state.asset_config,
279
+ request.app.state.static_export_dir,
280
+ source_hash,
281
+ )
282
+ if raw is not None:
283
+ md_renderer = create_md_renderer()
284
+ body_html = render_markdown_with_math_placeholders(md_renderer, raw)
226
285
 
227
286
  # Translated view
228
287
  if view == "translated":
@@ -230,38 +289,55 @@ if (templateSelect) {{
230
289
  body_html = '<div class="warning">No translated markdown found. Provide <code>--md-translated-root</code> and place <code><base>.<lang>.md</code> under that root.</div>'
231
290
  else:
232
291
  translated_path = translations.get(selected_lang)
233
- if not translated_path:
292
+ translated_markdown_url = asset_urls["md_translated_url"].get(selected_lang, "")
293
+ if not translated_path or not translated_markdown_url:
234
294
  body_html = '<div class="warning">Translated markdown not found for the selected language.</div>'
235
295
  else:
236
- try:
237
- raw = translated_path.read_text(encoding="utf-8")
238
- except UnicodeDecodeError:
239
- raw = translated_path.read_text(encoding="latin-1")
240
- raw = normalize_markdown_images(raw)
241
- md_renderer = create_md_renderer()
242
- body_html = render_markdown_with_math_placeholders(md_renderer, raw)
243
- raw_content = raw
244
296
  translated_path_str = str(translated_path)
245
297
  show_outline = True
298
+ if prefer_local:
299
+ raw = _load_markdown_for_view(
300
+ index,
301
+ request.app.state.asset_config,
302
+ request.app.state.static_export_dir,
303
+ source_hash,
304
+ lang=selected_lang,
305
+ )
306
+ if raw is not None:
307
+ md_renderer = create_md_renderer()
308
+ body_html = render_markdown_with_math_placeholders(md_renderer, raw)
246
309
 
247
310
  # PDF view
248
311
  if view == "pdf":
249
- if not pdf_path:
312
+ if not pdf_path or not pdf_url:
250
313
  body_html = '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
251
314
  pdf_filename = str(pdf_path.name) if pdf_path else ""
315
+ pdfjs_cdn_base_url = request.app.state.pdfjs_cdn_base_url
316
+ if pdfjs_cdn_base_url:
317
+ pdfjs_script_url = f"{pdfjs_cdn_base_url}/legacy/build/pdf.min.js"
318
+ pdfjs_worker_url = f"{pdfjs_cdn_base_url}/legacy/build/pdf.worker.min.js"
319
+ else:
320
+ pdfjs_script_url = "/pdfjs/build/pdf.js"
321
+ pdfjs_worker_url = "/pdfjs/build/pdf.worker.js"
252
322
 
253
323
  # PDF.js view
254
324
  if view == "pdfjs":
255
- if not pdf_path:
325
+ if not pdf_path or not pdf_url:
256
326
  body_html = '<div class="warning">PDF not found. Provide --pdf-root to enable PDF viewing.</div>'
257
- pdfjs_url = build_pdfjs_viewer_url(pdf_url)
327
+ pdfjs_url = build_pdfjs_viewer_url(
328
+ pdf_url,
329
+ cdn_base_url=request.app.state.pdfjs_cdn_base_url,
330
+ )
258
331
  pdf_filename = str(pdf_path.name) if pdf_path else ""
259
332
 
260
333
  # Split view
261
334
  if view == "split":
262
335
  def pane_src(pane_view: str) -> str:
263
- if pane_view == "pdfjs" and pdf_path:
264
- return build_pdfjs_viewer_url(pdf_url)
336
+ if pane_view == "pdfjs" and pdf_path and pdf_url:
337
+ return build_pdfjs_viewer_url(
338
+ pdf_url,
339
+ cdn_base_url=request.app.state.pdfjs_cdn_base_url,
340
+ )
265
341
  params: dict[str, str] = {"view": pane_view, "embed": "1"}
266
342
  if pane_view == "summary" and template_param:
267
343
  params["template"] = str(template_param)
@@ -307,12 +383,14 @@ if (templateSelect) {{
307
383
  show_outline=show_outline,
308
384
  # Content variables
309
385
  body_html=body_html,
310
- raw_content=raw_content,
311
386
  summary_template_name=summary_template_name,
312
387
  template_warning=template_warning,
313
388
  template_controls=template_controls,
314
389
  available_templates=available_templates,
315
390
  selected_template_tag=selected_tag,
391
+ images_base_url=images_base_url,
392
+ source_markdown_url=source_markdown_url,
393
+ translated_markdown_url=translated_markdown_url,
316
394
  # Source view
317
395
  source_path=source_path_str,
318
396
  # Translated view
@@ -322,6 +400,8 @@ if (templateSelect) {{
322
400
  # PDF view
323
401
  pdf_filename=pdf_filename,
324
402
  pdf_url=pdf_url,
403
+ pdfjs_script_url=pdfjs_script_url,
404
+ pdfjs_worker_url=pdfjs_worker_url,
325
405
  # PDF.js view
326
406
  pdfjs_url=pdfjs_url,
327
407
  # Split view
@@ -96,6 +96,64 @@ def normalize_markdown_images(text: str) -> str:
96
96
  return "\n".join(out)
97
97
 
98
98
 
99
+ def normalize_fenced_code_blocks(text: str) -> str:
100
+ """Ensure fenced code block markers appear on their own lines."""
101
+ fence_re = re.compile(r"(`{3,}|~{3,})")
102
+ out: list[str] = []
103
+ for line in text.splitlines():
104
+ match = fence_re.search(line)
105
+ if not match:
106
+ out.append(line)
107
+ continue
108
+ prefix = line[: match.start()]
109
+ suffix = line[match.start() :]
110
+ if prefix.strip():
111
+ out.append(prefix.rstrip())
112
+ out.append(suffix.lstrip())
113
+ else:
114
+ out.append(line)
115
+ return "\n".join(out)
116
+
117
+
118
+ def normalize_unbalanced_fences(text: str) -> str:
119
+ """Drop unmatched opening fences so later content still renders."""
120
+ lines = text.splitlines()
121
+ out: list[str] = []
122
+ in_fence = False
123
+ fence_char = ""
124
+ fence_len = 0
125
+ fence_open_indices: list[int] = []
126
+ fence_re = re.compile(r"([`~]{3,})(.*)$")
127
+
128
+ for line in lines:
129
+ stripped = line.lstrip(" ")
130
+ leading_spaces = len(line) - len(stripped)
131
+ is_fence = False
132
+ if leading_spaces <= 3 and stripped:
133
+ match = fence_re.match(stripped)
134
+ if match:
135
+ run = match.group(1)
136
+ fence = run[0]
137
+ run_len = len(run)
138
+ if not in_fence:
139
+ in_fence = True
140
+ fence_char = fence
141
+ fence_len = run_len
142
+ fence_open_indices.append(len(out))
143
+ is_fence = True
144
+ elif fence == fence_char and run_len >= fence_len:
145
+ in_fence = False
146
+ fence_char = ""
147
+ fence_len = 0
148
+ is_fence = True
149
+
150
+ out.append(line)
151
+
152
+ if in_fence and fence_open_indices:
153
+ out.pop(fence_open_indices[-1])
154
+ return "\n".join(out)
155
+
156
+
99
157
  def extract_math_placeholders(text: str) -> tuple[str, dict[str, str]]:
100
158
  """Extract math expressions and replace with placeholders."""
101
159
  placeholders: dict[str, str] = {}
@@ -476,6 +534,8 @@ def extract_html_table_placeholders(text: str) -> tuple[str, dict[str, str]]:
476
534
 
477
535
  def render_markdown_with_math_placeholders(md: MarkdownIt, text: str) -> str:
478
536
  """Render markdown with math, images, and tables properly escaped."""
537
+ text = normalize_fenced_code_blocks(text)
538
+ text = normalize_unbalanced_fences(text)
479
539
  text = strip_paragraph_wrapped_tables(text)
480
540
  text = normalize_footnote_definitions(text)
481
541
  rendered, table_placeholders = extract_html_table_placeholders(text)